import os
import time
import json
import csv
from datetime import datetime
from openai import OpenAI
import pytz
from typing import List, Dict, Any
from tqdm import tqdm  

class DeepSeekCoTGenerator:
    def __init__(self):
        self.start_time = time.time()
        try:
            self.client = OpenAI(
                api_key="xxx",
                base_url="xxx"
            )
        except Exception as e:
            print(f"Initialization failed: {str(e)}")
            raise

        self.total_prompt_tokens = 0
        self.total_completion_tokens = 0
        self.total_cost = 0.0
        self.total_saved = 0  # Track total saved records
        self.output_file = "xxx.json"
        self.temp_file = "xxx.json"  # Temporary file for periodic saves

    def _get_beijing_time(self) -> datetime:
        utc_now = datetime.utcnow().replace(tzinfo=pytz.utc)
        return utc_now.astimezone(pytz.timezone('Asia/Shanghai'))

    def _calculate_cost(self, prompt_tokens: int, completion_tokens: int) -> float:
        beijing_time = self._get_beijing_time()
        current_minute = beijing_time.hour * 60 + beijing_time.minute

        if 30 <= current_minute < 510:
            input_rate = 1 / 1_000_000
            output_rate = 4 / 1_000_000
        else:
            input_rate = 4 / 1_000_000
            output_rate = 16 / 1_000_000

        return (prompt_tokens * input_rate) + (completion_tokens * output_rate)
    
    def _show_stats(self, usage: dict, cost: float):
        self.total_prompt_tokens += usage.prompt_tokens
        self.total_completion_tokens += usage.completion_tokens
        self.total_cost += cost

        print(f"\n[Current Stats]")
        print(f"Input tokens: {usage.prompt_tokens} | Output tokens: {usage.completion_tokens}")
        print(f"Current cost: ¥{cost:.6f}")

        print(f"\n[Total Stats]")
        print(f"Total input tokens: {self.total_prompt_tokens}")
        print(f"Total output tokens: {self.total_completion_tokens}")
        print(f"Total cost: ¥{self.total_cost:.6f}\n")
    
    def _save_results(self, results: List[Dict[str, Any]], batch_mode: bool = False):
        """Save results to file, with batch mode support"""
        if batch_mode:
            # In batch mode, append to temp file
            with open(self.temp_file, 'w', encoding='utf-8') as f:
                json.dump(results, f, ensure_ascii=False, indent=2)
        else:
            # Final save to main output file
            with open(self.output_file, 'w', encoding='utf-8') as f:
                json.dump(results, f, ensure_ascii=False, indent=2)
    
    def generate_long_cot(self, text: str, label: int, num_samples: int = 5) -> Dict[str, Any]:
        """Generate CoT responses with different reasoning patterns in English"""
        valid_responses = []
        sentiment_mapping = {
            0: "neutral",
            1: "positive",
            2: "negative"
        }
        sentiment_definition = (
            "0=neutral: no clear emotional cues | "
            "1=positive: features like positive lexicon, uplifting emojis, achievement expressions | "
            "2=negative: contains negative elements, expressions of unpleasant events"
        )
        for i in range(num_samples):
            try:
                if i < 2:  
                    reasoning_type = "non-linear"
                    length = "Long"
                    prompt = (
                        f"""Perform rigorous sentiment analysis by dynamically applying selected reasoning methods. Use the following framework (choose steps, order, and iterations as needed):
                        【Reasoning Framework】
                          Decomposition: Break down text elements (semantics/context/rhetoric)
                          Reflection: Question initial assumptions and verify their rationality
                          Verification: Cross-check logical consistency
                          Transition: Handle contradictory information (using "however" - like analysis)
                          Retry: Correct the reasoning path when errors are found
    
                          Process Requirements:
                          → Must include ≥ 5 reasoning steps, freely combining the above components, without limitation on the number of times or order, and also free to explore other reasoning methods.
                          → Must analyze the sentiment of this text using non-linear reasoning (e.g., Tree/Graph of Thought).
                          → Each step must clearly indicate the type of reasoning used (e.g., 【Step 1 - Decomposition】).
                          → At least two verification stages must be included:
                            - Preliminary conclusion verification
                            - Final decision verification
                          → Contradictions in the text must be addressed (demonstrating the use of "however" - like analysis).
                          → Error correction must show the complete adjustment of the reasoning path.
                          → Final conclusion must align with: {sentiment_definition}
    
                          Error Checkpoints:
                          ✓ Sentiment intensity validation
                          ✓ Context-text consistency check
                          ✓ Emoji-semantic alignment verification
    
                          Tweet content: "{text}"
                          Conclude with "Therefore, the sentiment label is: {label}" (0=neutral,1=positive,2=negative)"""
                    )
                elif i == 2:  
                    reasoning_type = "linear"
                    length = "Short"
                    prompt = (
                        f"Analyze the sentiment of this text step by step. Requirements:\n"
                        f"1. Use simple chain of thought without specific reasoning process\n"
                        f"2. Final conclusion must align with: {sentiment_definition}\n"
                        f"3. Text content: {text}\n"
                        f"4. Conclude with Therefore, the sentiment label is: {label} (0=neutral,1=positive,2=negative)\n"
                    )
                elif i == 3: 
                    reasoning_type = "linear"
                    length = "Long"
                    prompt = (
                        f"""Perform rigorous sentiment analysis by dynamically applying selected reasoning methods. Use the following framework (choose steps, order, and iterations as needed):
                        【Reasoning Framework】
                          Decomposition: Break down text elements (semantics/context/rhetoric)
                          Reflection: Question initial assumptions and verify their rationality
                          Verification: Cross-check logical consistency
                          Transition: Handle contradictory information (using "however" - like analysis)
                          Retry: Correct the reasoning path when errors are found
    
                          Process Requirements:
                          → Must include ≥ 5 reasoning steps, freely combining the above components, without limitation on the number of times or order, and also free to explore other reasoning methods.
                          → Must explore at least TWO different paths using 'alternatively...' comparisons"
                          → Each step must clearly indicate the type of reasoning used (e.g., 【Step 1 - Decomposition】).
                          → At least two verification stages must be included:
                            - Preliminary conclusion verification
                            - Final decision verification
                          → Contradictions in the text must be addressed (demonstrating the use of "however" - like analysis).
                          → Error correction must show the complete adjustment of the reasoning path.
                          → Final conclusion must align with: {sentiment_definition}
    
                          Error Checkpoints:
                          ✓ Sentiment intensity validation
                          ✓ Context-text consistency check
                          ✓ Emoji-semantic alignment verification
    
                          Tweet content: "{text}"
                          Conclude with "Therefore, the sentiment label is: {label}" (0=neutral,1=positive,2=negative)"""
                    )
                else:  
                    reasoning_type = "linear"
                    length = "Long"
                    prompt = (
                        f"""Perform rigorous sentiment analysis by dynamically applying selected reasoning methods. Use the following framework (choose steps, order, and iterations as needed):
                        【Reasoning Framework】
                          Decomposition: Break down text elements (semantics/context/rhetoric)
                          Reflection: Question initial assumptions and verify their rationality
                          Verification: Cross-check logical consistency
                          Transition: Handle contradictory information (using "however" - like analysis)
                          Retry: Correct the reasoning path when errors are found
    
                          Process Requirements:
                          → Must include ≥ 5 reasoning steps, freely combining the above components, without limitation on the number of times or order, and also free to explore other reasoning methods.
                          → Each step must clearly indicate the type of reasoning used (e.g., 【Step 1 - Decomposition】).
                          → At least two verification stages must be included:
                            - Preliminary conclusion verification
                            - Final decision verification
                          → Contradictions in the text must be addressed (demonstrating the use of "however" - like analysis).
                          → Error correction must show the complete adjustment of the reasoning path.
                          → Final conclusion must align with: {sentiment_definition}
    
                          Error Checkpoints:
                          ✓ Sentiment intensity validation
                          ✓ Context-text consistency check
                          ✓ Emoji-semantic alignment verification
    
                          Tweet content: "{text}"
                          Conclude with "Therefore, the sentiment label is: {label}" (0=neutral,1=positive,2=negative)"""
                    )

                response = self.client.chat.completions.create(
                    model="xxx",
                    messages=[
                        {"role": "system", "content": "You are an expert in complex sentiment analysis with multi-step reasoning."},
                        {"role": "user", "content": prompt}
                    ],
                    temperature=0.7,
                    max_tokens=8192,
                    stream=False
                )
                
                reply = response.choices[0].message.content
                usage = response.usage
                cost = self._calculate_cost(usage.prompt_tokens, usage.completion_tokens)
                self._show_stats(usage, cost)
        
                if f"sentiment label is: {label}" in reply.lower():
                    valid_responses.append({
                        "content": reply,
                        "reasoning_type": reasoning_type,
                        "length": length
                    })
                
            except Exception as e:
                print(f"Error generating CoT: {str(e)}")
                continue
        
        return {
            "text": text,
            "Label": label,
            "CoT_Responses": valid_responses
        }

    def load_dataset_from_csv(self, file_path: str) -> List[Dict[str, Any]]:
        """Load dataset from CSV with sentiment labels as integers"""
        dataset = []
        sentiment_mapping = {
            "neutral": 0,
            "positive": 1,
            "negative": 2
        }
        
        try:
            with open(file_path, mode='r', encoding='utf-8') as file:
                reader = csv.DictReader(file)
                for row in reader:
                    sentiment = row["Sentiment"].strip().lower()
                    sentiment_id = sentiment_mapping.get(sentiment, -1)
                    
                    if sentiment_id == -1:
                        print(f"Warning: Unknown sentiment '{row['Sentiment']}' in row {row.get('Sr No.', 'N/A')}, skipping...")
                        continue

                    dataset.append({
                        "id": row.get("Sr No.", len(dataset) + 1),
                        "class": sentiment_id,  
                        "text": row["Utterance"]
                    })
            print(f"Loaded {len(dataset)} records from {file_path}")
        except Exception as e:
            print(f"Error loading dataset: {str(e)}")
            raise
        return dataset

    def process_dataset(self, input_csv: str):
        """Process dataset and save results periodically"""
        dataset = self.load_dataset_from_csv(input_csv)
        results = []
        
        with tqdm(total=len(dataset) - 5000, desc="Processing Items", unit="item") as pbar:
            for i, item in enumerate(dataset, 5000):
                print(f"\nProcessing item {item['id']} ({i}/{len(dataset)})...")
                result = self.generate_long_cot(item["text"], item["class"])
                
                if result["CoT_Responses"]:
                    for cot in result["CoT_Responses"]:
                        record = {
                            "text": result["text"],
                            "CoT": cot["content"],
                            "Label": result["Label"],
                            "ReasoningType": cot["reasoning_type"],
                            "Length": cot["length"]
                        }
                        results.append(record)
                        self.total_saved += 1
                        
                        print(f"\nSaved record {len(results)} (Total saved: {self.total_saved})")
                        print(json.dumps(record, indent=2, ensure_ascii=False))
                
                pbar.update(1)
                
                if i % 5 == 0 or i == len(dataset):
                    self._save_results(results, batch_mode=True)
                    print(f"\n=== Progress Update ===")
                    print(f"Saved batch of {len(results)} records (Total saved: {self.total_saved})")
                    print(f"Processed {i}/{len(dataset)} items ({i/len(dataset)*100:.1f}%)")
                    print(f"Temporary results saved to {self.temp_file}")
        
        self._save_results(results)
        print(f"\nProcessing completed. Results saved to {self.output_file}")
        print(f"Total valid responses: {self.total_saved}")
    
    def __del__(self):
        if hasattr(self, 'start_time'):
            total_time = time.time() - self.start_time
            hours, remainder = divmod(total_time, 3600)
            minutes, seconds = divmod(remainder, 60)
            print(f"\nTotal runtime: {int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}")

if __name__ == "__main__":
    try:
        cot_generator = DeepSeekCoTGenerator()
        input_csv = "xxx.csv"  
        cot_generator.process_dataset(input_csv)
    except Exception as e:
        print(f"System error: {str(e)}")